import mdp.utils
from numpy import *
#Written 26 Jan 2009 by Alex O. Holcombe, http://www.psych.usyd.edu.au/staff/alexh/
def collapseBy(data,DV,*factors):
#function should take a variable number of column names, then collapse across all other variables to return means, standard error, standard deviation
#of the dependent variable with name <DV>
#data must be a numpy ndarray with a dtype dictionary (like that returned by loadtxt) that includes the column names, which must include the *factors
#instead of recursion, determine all the index combinations
#then loop through that flat list, calculating the mean for each case
	dimSizes=list(); factorVals=list()
	for d in factors:
		uniqVals = mdp.utils.uniq(data[d])
		factorVals.append( uniqVals )
		numvals =  len( uniqVals )
		dimSizes.append(numvals)
	avgs = zeros(dimSizes)*NAN;	stddevs = zeros(dimSizes)*NAN;	ns=zeros(dimSizes)*NAN
	#create every possible combination of uniqVals
	factorIdxs= indices(dimSizes)
	#reshape each factor's grid into a flat list
	factorIdxs = factorIdxs.reshape(len(factors),prod(dimSizes))
	#first dim is now factor, second is just the list of factor values
	#proceed through length of facA
	for i in range( prod(dimSizes) ):  #for every combination of indices
	  eachFacIdx=list()  #will hold this combination
	  for d in range( len(factors) ): #for this particular combination, determine the value for each factor
		thisFacUniqVals = factorVals[d]
		eachFacIdx.append( factorIdxs[d,i] )
		dataIdxsThis = where( data[ factors[d] ]== thisFacUniqVals[factorIdxs[d,i]] )#in which rows does data have this value of the factor?
		if d==0: dataIdxsCombo = set(list(squeeze(dataIdxsThis)))
		else: dataIdxsCombo = dataIdxsCombo.intersection( set(list(squeeze(dataIdxsThis))) )
	  dataIdxsCombo = list( dataIdxsCombo )
	  #print 'eachFacIdx=', eachFacIdx
	  #print 'DV=',DV,' dataIdxsCombo=',dataIdxsCombo
	  #print mean( data[DV][dataIdxsCombo] )
	  #assigning this particular combination of factors- eachFacIdx, but can't use array because then thinks all for the first dimension, have to use tuple with commas
	  avgs[tuple(eachFacIdx)] = mean( data[DV][dataIdxsCombo] )
	  stddevs[tuple(eachFacIdx)] = std( data[DV][dataIdxsCombo] )
	  ns[tuple(eachFacIdx)] = len(dataIdxsCombo)
	return avgs,stddevs,ns